library(readxl)
library(xlsx)
library(sjPlot)
library(ggplot2)
library(lme4)
## Loading required package: Matrix
library(stringr)
library(ggExtra)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# linguistic data
if (rstudioapi::isAvailable()){
currdir = dirname(rstudioapi::getSourceEditorContext()$path)
} else {
currdir = getwd()
}
file_path = file.path(dirname(dirname(currdir)), 'data/extracted_data_3.xlsx')
data <- read_excel(file_path)
# Handling data
data$Agent = ifelse(data$conv == 1,"H","R")
data = data[!(data$locutor %in% c(1,4,19,23)),]
# Adding / renaming columns
data$Trial = data$conv_id_unif
data$Trial2 = paste0('t', str_pad(data$Trial, 2, pad = "0"))
# extra columns will add themselves automatically - just creating structures
df2 = data.frame(mean=numeric(26),
std=numeric(26),
mean_r=numeric(26),
std_r=numeric(26),
mean_h=numeric(26),
std_h=numeric(26),
row.names = c('lexical_richness_part','lexical_richness_conv','linguistic_complexity_part','linguistic_complexity_conv','content_complexity_part','content_complexity_conv','ratio_silence_lgth_part','ratio_silence_lgth_conv','sum_ipu_lgth_part','sum_ipu_lgth_conv','qt_discourse_part','qt_discourse_conv','qt_feedback_part','qt_feedback_conv','qt_filled_pause_part','qt_filled_pause_conv','ratio_discourse_part','ratio_discourse_conv','nratio_feedback_part','nratio_feedback_conv','ratio_filled_pause_part','ratio_filled_pause_conv','mean_ipu_lgth_part','mean_ipu_lgth_conv','speech_rate_min4_part','speech_rate_min4_conv'),
stringsAsFactors=FALSE)
lexical_richness
ggplot(data, aes(x = lexical_richness, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = lexical_richness, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = lexical_richness)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'lexical_richness')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'lexical_richness'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'lexical_richness'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -1260.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8290 -0.6602 -0.0551 0.6464 6.9743
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 7.326e-04 2.707e-02
## Trial 8.066e-09 8.981e-05 1.00
## Residual 4.210e-03 6.489e-02
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.150329 0.016123 9.324
## data_conv 0.178702 0.077603 2.303
## AgentR 0.011170 0.018854 0.592
## Trial 0.002416 0.000856 2.822
## data_conv:AgentR -0.054478 0.092948 -0.586
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.847
## AgentR -0.686 0.706
## Trial -0.369 0.105 0.188
## dt_cnv:AgnR 0.741 -0.841 -0.939 -0.185
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'lexical_richness'))
part ~ conv lexical_richness
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.15
|
0.12 – 0.18
|
<0.001
|
|
data_conv
|
0.18
|
0.03 – 0.33
|
0.021
|
|
Agent [R]
|
0.01
|
-0.03 – 0.05
|
0.554
|
|
Trial
|
0.00
|
0.00 – 0.00
|
0.005
|
|
data_conv * Agent [R]
|
-0.05
|
-0.24 – 0.13
|
0.558
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.00
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
1.00
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.048 / NA
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'lexical_richness'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = cbind(s,l)
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('lexical_richness_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('lexical_richness_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('lexical_richness_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('lexical_richness_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('lexical_richness_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('lexical_richness_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: lexical_richness Conv",
y = "VD: lexical_richness Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: lexical_richness Conv",
y = "VD: lexical_richness Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

linguistic_complexity
ggplot(data, aes(x = linguistic_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = linguistic_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = linguistic_complexity)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'linguistic_complexity')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'linguistic_complexity'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'linguistic_complexity'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -1388.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3545 -0.6236 -0.0275 0.5946 4.1871
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 6.400e-04 0.025298
## Trial 1.136e-06 0.001066 -0.59
## Residual 3.267e-03 0.057159
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.2680245 0.0260919 10.272
## data_conv 0.1638249 0.0723663 2.264
## AgentR 0.0565537 0.0303653 1.862
## Trial 0.0010692 0.0007787 1.373
## data_conv:AgentR -0.1415131 0.0908802 -1.557
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.955
## AgentR -0.812 0.833
## Trial -0.072 -0.117 -0.097
## dt_cnv:AgnR 0.756 -0.791 -0.982 0.092
tab_model(mdl, title = paste("part ~ conv ", 'linguistic_complexity'))
part ~ conv linguistic_complexity
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.27
|
0.22 – 0.32
|
<0.001
|
|
data_conv
|
0.16
|
0.02 – 0.31
|
0.024
|
|
Agent [R]
|
0.06
|
-0.00 – 0.12
|
0.063
|
|
Trial
|
0.00
|
-0.00 – 0.00
|
0.170
|
|
data_conv * Agent [R]
|
-0.14
|
-0.32 – 0.04
|
0.119
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.00
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-0.59
|
|
ICC
|
0.14
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.016 / 0.150
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'linguistic_complexity'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('linguistic_complexity_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('linguistic_complexity_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('linguistic_complexity_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('linguistic_complexity_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('linguistic_complexity_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('linguistic_complexity_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: linguistic_complexity Conv",
y = "VD: linguistic_complexity Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: linguistic_complexity Conv",
y = "VD: linguistic_complexity Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

content_complexity
ggplot(data, aes(x = content_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = content_complexity, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = content_complexity)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'content_complexity')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'content_complexity'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'content_complexity'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -1620.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1014 -0.6520 -0.0157 0.5953 3.2123
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 1.607e-04 0.0126759
## Trial 1.061e-07 0.0003257 -1.00
## Residual 2.118e-03 0.0460196
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.2300966 0.0174530 13.184
## data_conv 0.1075481 0.0751409 1.431
## AgentR 0.0289939 0.0228220 1.270
## Trial -0.0006037 0.0006002 -1.006
## data_conv:AgentR -0.0966974 0.0936795 -1.032
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.955
## AgentR -0.710 0.732
## Trial -0.191 -0.014 -0.065
## dt_cnv:AgnR 0.757 -0.802 -0.980 0.061
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'content_complexity'))
part ~ conv content_complexity
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.23
|
0.20 – 0.26
|
<0.001
|
|
data_conv
|
0.11
|
-0.04 – 0.25
|
0.152
|
|
Agent [R]
|
0.03
|
-0.02 – 0.07
|
0.204
|
|
Trial
|
-0.00
|
-0.00 – 0.00
|
0.314
|
|
data_conv * Agent [R]
|
-0.10
|
-0.28 – 0.09
|
0.302
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.00
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-1.00
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.014 / NA
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'content_complexity'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('content_complexity_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('content_complexity_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('content_complexity_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('content_complexity_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('content_complexity_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('content_complexity_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: content_complexity Conv",
y = "VD: content_complexity Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: content_complexity Conv",
y = "VD: content_complexity Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

ratio_silence_lgth
ggplot(data, aes(x = ratio_silence_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_silence_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = ratio_silence_lgth)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'ratio_silence_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_silence_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_silence_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -1307
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2753 -0.6165 -0.0025 0.6903 2.8192
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 1.047e-02 0.102319
## Trial 9.878e-06 0.003143 -0.53
## Residual 3.395e-03 0.058267
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.017570 0.033642 30.247
## data_conv -0.533077 0.040433 -13.184
## AgentR 0.364294 0.060372 6.034
## Trial -0.005688 0.001022 -5.567
## data_conv:AgentR -0.275514 0.075843 -3.633
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.730
## AgentR -0.176 0.215
## Trial -0.299 -0.039 0.069
## dt_cnv:AgnR 0.264 -0.344 -0.987 -0.061
tab_model(mdl, title = paste("part ~ conv ", 'ratio_silence_lgth'))
part ~ conv ratio_silence_lgth
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
1.02
|
0.95 – 1.08
|
<0.001
|
|
data_conv
|
-0.53
|
-0.61 – -0.45
|
<0.001
|
|
Agent [R]
|
0.36
|
0.25 – 0.48
|
<0.001
|
|
Trial
|
-0.01
|
-0.01 – -0.00
|
<0.001
|
|
data_conv * Agent [R]
|
-0.28
|
-0.42 – -0.13
|
<0.001
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.01
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-0.53
|
|
ICC
|
0.73
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.229 / 0.789
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_silence_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('ratio_silence_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('ratio_silence_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('ratio_silence_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_silence_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_silence_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('ratio_silence_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: ratio_silence_lgth Conv",
y = "VD: ratio_silence_lgth Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: ratio_silence_lgth Conv",
y = "VD: ratio_silence_lgth Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

sum_ipu_lgth
ggplot(data, aes(x = sum_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = sum_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = sum_ipu_lgth)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'sum_ipu_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'sum_ipu_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'sum_ipu_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 2778.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.8192 -0.6903 0.0025 0.6165 3.2753
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 36.44323 6.0368
## Trial 0.03439 0.1854 -0.53
## Residual 11.81799 3.4377
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 30.41488 1.64959 18.438
## data_conv -0.53308 0.04043 -13.184
## AgentR -5.23801 1.11255 -4.708
## Trial 0.33559 0.06028 5.567
## data_conv:AgentR -0.27551 0.07584 -3.633
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.568
## AgentR -0.411 0.698
## Trial -0.417 0.039 -0.025
## dt_cnv:AgnR 0.181 -0.344 -0.861 0.061
tab_model(mdl, title = paste("part ~ conv ", 'sum_ipu_lgth'))
part ~ conv sum_ipu_lgth
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
30.41
|
27.18 – 33.65
|
<0.001
|
|
data_conv
|
-0.53
|
-0.61 – -0.45
|
<0.001
|
|
Agent [R]
|
-5.24
|
-7.42 – -3.06
|
<0.001
|
|
Trial
|
0.34
|
0.22 – 0.45
|
<0.001
|
|
data_conv * Agent [R]
|
-0.28
|
-0.42 – -0.13
|
<0.001
|
|
Random Effects
|
|
σ2
|
11.82
|
|
τ00 locutor
|
36.44
|
|
τ11 locutor.Trial
|
0.03
|
|
ρ01 locutor
|
-0.53
|
|
ICC
|
0.73
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.229 / 0.789
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'sum_ipu_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('sum_ipu_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('sum_ipu_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('sum_ipu_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('sum_ipu_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('sum_ipu_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('sum_ipu_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: sum_ipu_lgth Conv",
y = "VD: sum_ipu_lgth Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: sum_ipu_lgth Conv",
y = "VD: sum_ipu_lgth Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_discourse
ggplot(data, aes(x = qt_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = qt_discourse)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'qt_discourse')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_discourse'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_discourse'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 2212.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1510 -0.6998 -0.1189 0.6175 3.7284
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 2.13485 1.4611
## Trial 0.01509 0.1228 -0.45
## Residual 4.07850 2.0195
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.36665 0.42192 7.979
## data_conv -0.12356 0.05071 -2.437
## AgentR -0.48220 0.28817 -1.673
## Trial 0.12076 0.03759 3.213
## data_conv:AgentR -0.04365 0.15102 -0.289
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.469
## AgentR -0.449 0.706
## Trial -0.466 -0.041 -0.061
## dt_cnv:AgnR 0.099 -0.306 -0.534 0.104
tab_model(mdl, title = paste("part ~ conv ", 'qt_discourse'))
part ~ conv qt_discourse
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
3.37
|
2.54 – 4.19
|
<0.001
|
|
data_conv
|
-0.12
|
-0.22 – -0.02
|
0.015
|
|
Agent [R]
|
-0.48
|
-1.05 – 0.08
|
0.094
|
|
Trial
|
0.12
|
0.05 – 0.19
|
0.001
|
|
data_conv * Agent [R]
|
-0.04
|
-0.34 – 0.25
|
0.773
|
|
Random Effects
|
|
σ2
|
4.08
|
|
τ00 locutor
|
2.13
|
|
τ11 locutor.Trial
|
0.02
|
|
ρ01 locutor
|
-0.45
|
|
ICC
|
0.31
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.040 / 0.342
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_discourse'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('qt_discourse_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('qt_discourse_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('qt_discourse_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('qt_discourse_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('qt_discourse_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('qt_discourse_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: qt_discourse Conv",
y = "VD: qt_discourse Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: qt_discourse Conv",
y = "VD: qt_discourse Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_feedback
ggplot(data, aes(x = qt_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = qt_feedback)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'qt_feedback')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_feedback'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_feedback'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 2452.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.5178 -0.6009 -0.0654 0.5377 5.5439
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 4.1391928 2.03450
## Trial 0.0007844 0.02801 -1.00
## Residual 6.7044221 2.58929
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.51675 0.66072 9.863
## data_conv 0.06761 0.04592 1.472
## AgentR -1.92219 0.53546 -3.590
## Trial -0.09987 0.03425 -2.916
## data_conv:AgentR -0.06397 0.10826 -0.591
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.640
## AgentR -0.542 0.736
## Trial -0.472 0.126 0.077
## dt_cnv:AgnR 0.252 -0.406 -0.776 -0.024
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'qt_feedback'))
part ~ conv qt_feedback
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
6.52
|
5.22 – 7.81
|
<0.001
|
|
data_conv
|
0.07
|
-0.02 – 0.16
|
0.141
|
|
Agent [R]
|
-1.92
|
-2.97 – -0.87
|
<0.001
|
|
Trial
|
-0.10
|
-0.17 – -0.03
|
0.004
|
|
data_conv * Agent [R]
|
-0.06
|
-0.28 – 0.15
|
0.555
|
|
Random Effects
|
|
σ2
|
6.70
|
|
τ00 locutor
|
4.14
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-1.00
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.205 / NA
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_feedback'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('qt_feedback_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('qt_feedback_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('qt_feedback_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('qt_feedback_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('qt_feedback_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('qt_feedback_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: qt_feedback Conv",
y = "VD: qt_feedback Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: qt_feedback Conv",
y = "VD: qt_feedback Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

qt_filled_pause
ggplot(data, aes(x = qt_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = qt_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = qt_filled_pause)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'qt_filled_pause')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_filled_pause'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'qt_filled_pause'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 2016.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0423 -0.5926 -0.1137 0.5403 3.4174
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 3.0599 1.74925
## Trial 0.0076 0.08718 0.04
## Residual 2.6686 1.63357
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.73863 0.47950 5.711
## data_conv -0.07779 0.04332 -1.796
## AgentR -0.08165 0.27987 -0.292
## Trial 0.03905 0.02854 1.369
## data_conv:AgentR -0.12397 0.24630 -0.503
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.511
## AgentR -0.511 0.838
## Trial -0.191 0.065 0.067
## dt_cnv:AgnR 0.111 -0.170 -0.305 -0.085
tab_model(mdl, title = paste("part ~ conv ", 'qt_filled_pause'))
part ~ conv qt_filled_pause
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
2.74
|
1.80 – 3.68
|
<0.001
|
|
data_conv
|
-0.08
|
-0.16 – 0.01
|
0.073
|
|
Agent [R]
|
-0.08
|
-0.63 – 0.47
|
0.770
|
|
Trial
|
0.04
|
-0.02 – 0.09
|
0.171
|
|
data_conv * Agent [R]
|
-0.12
|
-0.61 – 0.36
|
0.615
|
|
Random Effects
|
|
σ2
|
2.67
|
|
τ00 locutor
|
3.06
|
|
τ11 locutor.Trial
|
0.01
|
|
ρ01 locutor
|
0.04
|
|
ICC
|
0.56
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.011 / 0.569
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'qt_filled_pause'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('qt_filled_pause_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('qt_filled_pause_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('qt_filled_pause_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('qt_filled_pause_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('qt_filled_pause_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('qt_filled_pause_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: qt_filled_pause Conv",
y = "VD: qt_filled_pause Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: qt_filled_pause Conv",
y = "VD: qt_filled_pause Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

ratio_discourse
ggplot(data, aes(x = ratio_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_discourse, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = ratio_discourse)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'ratio_discourse')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_discourse'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_discourse'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -2179.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.0186 -0.7284 -0.0450 0.5498 4.4077
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 9.029e-05 0.0095019
## Trial 6.985e-07 0.0008357 -0.53
## Residual 6.793e-04 0.0260635
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.0402339 0.0043781 9.190
## data_conv -0.0349661 0.0712201 -0.491
## AgentR 0.0026618 0.0040547 0.656
## Trial 0.0007880 0.0003843 2.050
## data_conv:AgentR 0.0382356 0.1070018 0.357
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.675
## AgentR -0.650 0.757
## Trial -0.456 -0.053 -0.065
## dt_cnv:AgnR 0.422 -0.667 -0.740 0.095
tab_model(mdl, title = paste("part ~ conv ", 'ratio_discourse'))
part ~ conv ratio_discourse
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.04
|
0.03 – 0.05
|
<0.001
|
|
data_conv
|
-0.03
|
-0.17 – 0.10
|
0.623
|
|
Agent [R]
|
0.00
|
-0.01 – 0.01
|
0.512
|
|
Trial
|
0.00
|
0.00 – 0.00
|
0.040
|
|
data_conv * Agent [R]
|
0.04
|
-0.17 – 0.25
|
0.721
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.00
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-0.53
|
|
ICC
|
0.10
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.016 / 0.112
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_discourse'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('ratio_discourse_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('ratio_discourse_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('ratio_discourse_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_discourse_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_discourse_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('ratio_discourse_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: ratio_discourse Conv",
y = "VD: ratio_discourse Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: ratio_discourse Conv",
y = "VD: ratio_discourse Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

nratio_feedback
ggplot(data, aes(x = nratio_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = nratio_feedback, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = nratio_feedback)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'nratio_feedback')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'nratio_feedback'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'nratio_feedback'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -516.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.6981 -0.6881 -0.0724 0.6373 3.7800
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 5.539e-03 0.074425
## Trial 2.052e-06 0.001432 -1.00
## Residual 1.838e-02 0.135570
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.420656 0.030614 13.741
## data_conv -0.109171 0.059408 -1.838
## AgentR -0.141563 0.030281 -4.675
## Trial -0.005980 0.001782 -3.355
## data_conv:AgentR -0.001098 0.086784 -0.013
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.736
## AgentR -0.638 0.750
## Trial -0.355 -0.063 -0.025
## dt_cnv:AgnR 0.499 -0.664 -0.892 0.009
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'nratio_feedback'))
part ~ conv nratio_feedback
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.42
|
0.36 – 0.48
|
<0.001
|
|
data_conv
|
-0.11
|
-0.23 – 0.01
|
0.066
|
|
Agent [R]
|
-0.14
|
-0.20 – -0.08
|
<0.001
|
|
Trial
|
-0.01
|
-0.01 – -0.00
|
0.001
|
|
data_conv * Agent [R]
|
-0.00
|
-0.17 – 0.17
|
0.990
|
|
Random Effects
|
|
σ2
|
0.02
|
|
τ00 locutor
|
0.01
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-1.00
|
|
ICC
|
0.20
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.172 / 0.333
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'nratio_feedback'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('nratio_feedback_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('nratio_feedback_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('nratio_feedback_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('nratio_feedback_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('nratio_feedback_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('nratio_feedback_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: nratio_feedback Conv",
y = "VD: nratio_feedback Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: nratio_feedback Conv",
y = "VD: nratio_feedback Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

ratio_filled_pause
ggplot(data, aes(x = ratio_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = ratio_filled_pause, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = ratio_filled_pause)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'ratio_filled_pause')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_filled_pause'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'ratio_filled_pause'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## boundary (singular) fit: see ?isSingular
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: -2211.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7298 -0.6161 -0.1710 0.5228 4.0181
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 5.145e-04 2.268e-02
## Trial 3.793e-09 6.159e-05 -1.00
## Residual 5.974e-04 2.444e-02
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 3.042e-02 6.310e-03 4.821
## data_conv 3.437e-02 4.658e-02 0.738
## AgentR 9.434e-03 3.766e-03 2.505
## Trial -8.149e-05 3.191e-04 -0.255
## data_conv:AgentR -6.087e-02 1.352e-01 -0.450
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.499
## AgentR -0.502 0.796
## Trial -0.353 0.090 0.092
## dt_cnv:AgnR 0.209 -0.359 -0.453 -0.140
## convergence code: 0
## boundary (singular) fit: see ?isSingular
tab_model(mdl, title = paste("part ~ conv ", 'ratio_filled_pause'))
part ~ conv ratio_filled_pause
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
0.03
|
0.02 – 0.04
|
<0.001
|
|
data_conv
|
0.03
|
-0.06 – 0.13
|
0.461
|
|
Agent [R]
|
0.01
|
0.00 – 0.02
|
0.012
|
|
Trial
|
-0.00
|
-0.00 – 0.00
|
0.798
|
|
data_conv * Agent [R]
|
-0.06
|
-0.33 – 0.20
|
0.653
|
|
Random Effects
|
|
σ2
|
0.00
|
|
τ00 locutor
|
0.00
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-1.00
|
|
ICC
|
0.46
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.012 / 0.462
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'ratio_filled_pause'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('ratio_filled_pause_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('ratio_filled_pause_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('ratio_filled_pause_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_filled_pause_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('ratio_filled_pause_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('ratio_filled_pause_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: ratio_filled_pause Conv",
y = "VD: ratio_filled_pause Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: ratio_filled_pause Conv",
y = "VD: ratio_filled_pause Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive
## Warning: Computation failed in `stat_density2d()`:
## bandwidths must be strictly positive

mean_ipu_lgth
ggplot(data, aes(x = mean_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = mean_ipu_lgth, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = mean_ipu_lgth)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'mean_ipu_lgth')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'mean_ipu_lgth'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'mean_ipu_lgth'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 391.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.3019 -0.6577 -0.0919 0.5407 5.2737
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 4.054e-02 0.201348
## Trial 3.046e-05 0.005519 1.00
## Residual 1.099e-01 0.331460
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 1.519381 0.094101 16.146
## data_conv -0.073846 0.047409 -1.558
## AgentR -0.318601 0.124662 -2.556
## Trial 0.031947 0.004471 7.146
## data_conv:AgentR 0.070538 0.091629 0.770
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.819
## AgentR -0.572 0.646
## Trial -0.028 -0.105 -0.042
## dt_cnv:AgnR 0.438 -0.526 -0.957 0.025
## convergence code: 0
## unable to evaluate scaled gradient
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
tab_model(mdl, title = paste("part ~ conv ", 'mean_ipu_lgth'))
part ~ conv mean_ipu_lgth
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
1.52
|
1.33 – 1.70
|
<0.001
|
|
data_conv
|
-0.07
|
-0.17 – 0.02
|
0.119
|
|
Agent [R]
|
-0.32
|
-0.56 – -0.07
|
0.011
|
|
Trial
|
0.03
|
0.02 – 0.04
|
<0.001
|
|
data_conv * Agent [R]
|
0.07
|
-0.11 – 0.25
|
0.441
|
|
Random Effects
|
|
σ2
|
0.11
|
|
τ00 locutor
|
0.04
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
1.00
|
|
ICC
|
0.33
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.119 / 0.409
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'mean_ipu_lgth'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('mean_ipu_lgth_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('mean_ipu_lgth_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('mean_ipu_lgth_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('mean_ipu_lgth_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('mean_ipu_lgth_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('mean_ipu_lgth_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: mean_ipu_lgth Conv",
y = "VD: mean_ipu_lgth Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: mean_ipu_lgth Conv",
y = "VD: mean_ipu_lgth Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

speech_rate_min4
ggplot(data, aes(x = speech_rate_min4, color=Agent)) + facet_grid(tier ~ .) + geom_histogram(aes(y=..density..), alpha=0.5, fill="white") + geom_density(alpha=.2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x = Trial2, y = speech_rate_min4, color=Agent)) + facet_grid(tier ~ .) + geom_boxplot()

ggplot(data,
aes(x = Agent,
fill = Agent,
y = speech_rate_min4)) +
stat_summary(fun.y = mean,
geom = "bar") +
stat_summary(fun.ymin = function(x) mean(x) - sd(x),
fun.ymax = function(x) mean(x) + sd(x),
geom="errorbar",
width = 0.25) +
facet_wrap(~tier) +
labs(x = "Agent",
y = 'speech_rate_min4')

# creating merged data - ling
temp1 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'speech_rate_min4'), tier=='conversant')
colnames(temp1) = c("locutor", "Trial", "Agent", "data_conv")
temp2 = subset(data, select = c("locutor", "conv_id_unif", "Agent", 'speech_rate_min4'), tier=='participant')
colnames(temp2) = c("locutor", "Trial", "Agent", "data_part")
merres = merge(temp1, temp2, by=c("locutor", "Trial", "Agent"))
# applying mixed model
mdl = lmer('data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)', data = merres)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00639831 (tol = 0.002, component 1)
print(summary(mdl))
## Linear mixed model fit by REML ['lmerMod']
## Formula: data_part ~ data_conv * Agent + Trial + (1 + Trial | locutor)
## Data: merres
##
## REML criterion at convergence: 819
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.72952 -0.63845 0.00567 0.60109 2.93797
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## locutor (Intercept) 0.400256 0.63266
## Trial 0.001588 0.03985 -0.47
## Residual 0.238110 0.48797
## Number of obs: 504, groups: locutor, 21
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 4.97586 0.34129 14.579
## data_conv 0.06038 0.05717 1.056
## AgentR 0.84174 0.47147 1.785
## Trial -0.01083 0.01077 -1.006
## data_conv:AgentR -0.20130 0.09157 -2.198
##
## Correlation of Fixed Effects:
## (Intr) dt_cnv AgentR Trial
## data_conv -0.904
## AgentR -0.604 0.664
## Trial -0.144 -0.075 -0.061
## dt_cnv:AgnR 0.562 -0.624 -0.994 0.059
## convergence code: 0
## Model failed to converge with max|grad| = 0.00639831 (tol = 0.002, component 1)
tab_model(mdl, title = paste("part ~ conv ", 'speech_rate_min4'))
part ~ conv speech_rate_min4
|
|
data_part
|
|
Predictors
|
Estimates
|
CI
|
p
|
|
(Intercept)
|
4.98
|
4.31 – 5.64
|
<0.001
|
|
data_conv
|
0.06
|
-0.05 – 0.17
|
0.291
|
|
Agent [R]
|
0.84
|
-0.08 – 1.77
|
0.074
|
|
Trial
|
-0.01
|
-0.03 – 0.01
|
0.315
|
|
data_conv * Agent [R]
|
-0.20
|
-0.38 – -0.02
|
0.028
|
|
Random Effects
|
|
σ2
|
0.24
|
|
τ00 locutor
|
0.40
|
|
τ11 locutor.Trial
|
0.00
|
|
ρ01 locutor
|
-0.47
|
|
ICC
|
0.59
|
|
N locutor
|
21
|
|
Observations
|
504
|
|
Marginal R2 / Conditional R2
|
0.020 / 0.595
|
# saving data
s = summary(mdl)[['coefficients']]
s = data.frame(s)
s$Feature = 'speech_rate_min4'
l = data.frame(suppressWarnings(confint(mdl)))[5:9,]
## Computing profile confidence intervals ...
df_overall = rbind(df_overall, cbind(s,l))
# saving other features
data_r = merres[which(merres$Agent == "R"),]
data_h = merres[which(merres$Agent == "H"),]
for (pc in c('part', 'conv')){
df2[paste0('speech_rate_min4_', pc), 'mean'] = mean(merres[[paste0('data_',pc)]])
df2[paste0('speech_rate_min4_', pc), 'std'] = sd(merres[[paste0('data_',pc)]])
df2[paste0('speech_rate_min4_', pc), 'mean_r'] = mean(data_r[[paste0('data_',pc)]])
df2[paste0('speech_rate_min4_', pc), 'std_r'] = sd(data_r[[paste0('data_',pc)]])
df2[paste0('speech_rate_min4_', pc), 'mean_h'] = mean(data_h[[paste0('data_',pc)]])
df2[paste0('speech_rate_min4_', pc), 'std_h'] = sd(data_h[[paste0('data_',pc)]])
}
# Setting up the building blocks
basic_plot <- ggplot(merres,
aes(x = data_conv,
y = data_part,
color = Agent)) +
theme_bw()
# Colored scatterplot and regression lines
basic_plot +
geom_point(alpha = .3,
size = .9) +
geom_smooth(method = "lm") +
labs(x = "VI: speech_rate_min4 Conv",
y = "VD: speech_rate_min4 Part",
color = "Agent")

# second plot
g <- ggplot(merres, aes(x = data_conv, y = data_part, color=Agent)) +
geom_point(alpha = 0.7) +
geom_density_2d(alpha=0.5) +
theme(legend.position="bottom") + xlim(0,max(merres$data_conv)) + ylim(0,max(merres$data_part)) +
labs(x = "VI: speech_rate_min4 Conv",
y = "VD: speech_rate_min4 Part",
color = "Agent")
ggMarginal(g, type="density", margins = "both", groupColour = TRUE)

Saver
if (rstudioapi::isAvailable()){
file_path = file.path(dirname(rstudioapi::getSourceEditorContext()$path), 'summary.xlsx')
} else {
file_path = file.path(getwd(), 'summary.xlsx')
}
# Write the first data set in a new workbook
write.xlsx(df_overall, file = file_path,
sheetName = 'models', append = TRUE)
# Write others sheets
write.xlsx(df2, file = file_path,
sheetName = 'hr_comparison', append = TRUE)